{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 案例：判断一封邮件是否是垃圾邮件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.io as sio\n",
    "from sklearn.svm import SVC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# training data\n",
    "data1 = sio.loadmat('data/spamTrain.mat')\n",
    "X, y = data1['X'], data1['y']\n",
    " \n",
    "# Testing data\n",
    "data2 = sio.loadmat('data/spamTest.mat')\n",
    "Xtest, ytest = data2['Xtest'], data2['ytest']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((4000, 1899), (4000, 1))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape,y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       ...,\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 1, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1],\n",
       "       [1],\n",
       "       [0],\n",
       "       ...,\n",
       "       [1],\n",
       "       [0],\n",
       "       [0]], dtype=uint8)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.99 0.03\n"
     ]
    }
   ],
   "source": [
    "Cvalues = [3, 10, 30, 100,0.01, 0.03, 0.1, 0.3,1 ] \n",
    "\n",
    "best_score = 0\n",
    "best_param = 0\n",
    "\n",
    "for c in Cvalues:\n",
    "    svc = SVC(C=c,kernel='linear')\n",
    "    svc.fit(X,y.flatten())\n",
    "    score= svc.score(Xtest,ytest.flatten())\n",
    "    if score > best_score:\n",
    "        best_score = score\n",
    "        best_param = c\n",
    "print(best_score,best_param)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "svc = SVC(0.03,kernel='linear')\n",
    "svc.fit(X,y.flatten())\n",
    "score_train= svc.score(X,y.flatten())\n",
    "score_test= svc.score(Xtest,ytest.flatten())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.99425 0.99\n"
     ]
    }
   ],
   "source": [
    "print(score_train,score_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
